******************************
* SET OPTIONS; SET LOCATIONS *
******************************
clear all
set trace off
set more off
global external_dir "D:\Dropbox\Michigan\RDC Project\AEJ Data Set\"			/*CHANGE THIS TO THE DIRECTORY THAT HAS THE FILES*/
global internal_dir "\projects\"											/*This is an internal RDC directory*/

capture log close


********************************
* Merge key variables together *
********************************
use "$internal_dir/data/combined_Census_ACS_flows.dta", clear

	*************************************
	* USER ACTION REQUIRED: country_str *
	*************************************
	*Note: at this point the user must create a variable, *country_str*
	*	This variable is simply a string version of the variable *country* that enables
	*	matching with the external data. The country names that correspond to country
	*	are located in the RDC documentation for the variable *pob* starting in 2000 and
	*	for all ACS surveys afterwards. 

*Merge in HI
merge 1:1 country_str year using "$internal_dir\external_data\hurricane_index.dta"
drop if year<1950
label val country country_lab	
drop _merge						

*Merge in 1980 stocks
merge m:1 country using "$internal_dir/data/Census 1980/census1980est_stock.dta"
drop _merge

*Merge in populations
merge m:1 country_str using "$internal_dir\external_data\populations.dta"

*Combined outcome: Use Census 2000 estimates for 1999 and before and ACS estimates after
gen inflows = flows_ACS
replace inflows = flows_Census if year<=1999 


*************************************
* Finalize dataset without controls *
*************************************

*Drop countries to create final sample
drop if country==462 /*"Africa"*/
drop if country==249 /*"Asia"*/
drop if country==528 /*"Oceania"*/
drop if country<200 /*Drop all of Europe*/
drop if country==218 | country==219 | country==246 | country==241 | country==244 /*Drop former Soviet Union*/
drop if country==221 /*Drop North Korea*/

*Keep necessary variables and years
keep country year inflows stock1980 population1980 HI
drop if inflows==.
drop if stock1980==.
keep if year>=1980 & year<=2004

*Save panel for future use
tempfile panel
save `panel', replace			

			
*********************************************
* HHI control variable (Created internally) *
*********************************************
use "$internal_dir/data/Census 1980/census1980est_stock_bycounty.dta", clear
bysort country: egen total_stock = sum(stock_by_county)
gen share = (stock_by_county/total_stock)
bysort country: egen HHI = total(share^2)
collapse HHI (sum) sample, by(country)
rename sample sample_HHI
rename HHI HHI_70s
gen missing_HHI = 0	

tempfile HHI
save `HHI', replace


***************************************
* Merge in external control variables *
***************************************
use `panel', clear

*Merge in external controls
merge 1:1 country_str year using "$internal_dir\external_data\external_controls.dta" 
	assert _merge==3
	drop _merge

*Merge in HHI (Internally created)
merge m:1 country using `HHI'
	drop if _merge==2
	drop _merge

*Save dataset with control variables
save "$internal_dir/data/panel.dta", replace /*NOTE: CHECK THAT COUNTRIES MATCH TABLE 1 HERE*/

